import os
import shutil
import re
from pathlib import Path

def is_pali_paragraph(text):
    # 检查是否包含巴利文特殊字符
    pali_chars = 'āīūṃṅñṭḍṇḷśṣḥ'
    # 检查拉丁字母的比例
    latin_ratio = len(re.findall(r'[a-zA-Z]', text)) / len(text) if text else 0
    
    # 如果包含巴利文特殊字符且拉丁字母比例较高，认为是巴利文段落
    return any(char in text for char in pali_chars) and latin_ratio > 0.5

def process_files(directory):
    if not os.path.exists(directory):
        print(f"目录 {directory} 不存在!")
        return

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                temp_file = file_path + '.temp'
                
                try:
                    # 读取文件内容
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                    
                    # 按空行分割成段落
                    paragraphs = content.split('\n\n')
                    
                    # 过滤掉纯巴利文段落
                    filtered_paragraphs = [para for para in paragraphs 
                                        if not is_pali_paragraph(para.strip())]
                    
                    # 写入临时文件
                    with open(temp_file, 'w', encoding='utf-8') as f:
                        f.write('\n\n'.join(filtered_paragraphs))
                    
                    # 替换原文件
                    shutil.move(temp_file, file_path)
                    print(f"处理完成: {file_path}")
                    
                except Exception as e:
                    print(f"处理文件 {file_path} 时出错: {str(e)}")
                    if os.path.exists(temp_file):
                        os.remove(temp_file)

def main():
    while True:
        directory = input("请输入要处理的文件夹路径（输入 'q' 退出）: ")
        if directory.lower() == 'q':
            break
            
        if os.path.exists(directory):
            confirm = input(f"确认要处理目录 {directory} 中的所有txt文件吗？(y/n): ")
            if confirm.lower() == 'y':
                process_files(directory)
                print("处理完成！")
            else:
                print("已取消操作。")
        else:
            print("目录不存在，请重新输入！")

if __name__ == "__main__":
    main()
